******************************************************************************************************************************************* fragment
****** LOAD DATABASE
clear all
set more off
set matsize 11000
cd "C:\Users\sfleit2\Documents\ReclassificationRisk_DoFiles"
use database_by_members_n_choiceset.dta


****** CLEAN DATABASE VARIABLES THAT WE DON'T NEED 
drop rx_dum*
sort  customer_number minor_market date_month alt_id
order customer_number minor_market date_month alt_id
drop if choice!=1  
drop if alt_id=="" 

****** CREATE VARIABLES FOR EXPENDITURE
gen paid_sum = paid_pharmacy + net_pd_amt
gen allowed_sum = allw_amt + allowed_pharma
gen outofpocketpharma = (allowed_pharma - paid_pharmacy)
gen outofpocketmedical = oop_dircomputed 
gen outpocket = (allowed_pharma - paid_pharmacy) + (allw_amt - net_pd_amt )

**** MERGING WITH RISK SCORES DATABASE 
sort mbr_sys_id year
merge mbr_sys_id year using NEW_riskscores_July2017_inc2015
*merge mbr_sys_id year using NEW_riskscores_July2017.dta
drop if _merge==2 & year!=2014
drop _merge

****** CREATE OTHER AUXILIARY VARIABLES 
tab year, gen(yeardum)
gen female = ( gdr_cd=="F")
tab minor_market, gen(marketdum)
gen aux=1 
* create the numer of people per firm and per plan
bys customer_number  year          : egen numpeople     = total(aux)
bys customer_number  year polnbr   : egen numpeopleplan = total(aux)
* Compute the average premium per person at firm-plan level
gen av_premiumpc_plan = (total_premium*12)/numpeopleplan
****************************
egen auxgroup = group( customer_number mbr_sys_id )
duplicates tag auxgroup  year, generate(check)
drop if check>0
drop check
* drop observations with missing ACG Score 
xtset auxgroup  year
gen laggedscore = L.acg_riskscore
drop if laggedscore==.
**********************************************************************************************

*compute the total of premiums, claims and risk scores from per person variables 
bys customer_number  year        : egen mean_pred_riskscore = mean(laggedscore)
bys customer_number  year        : egen mean_premium = mean(av_premiumpc_plan)

*create a local to denominate variables
local variables " mean_pred_riskscore mean_premium numpeople code_*"
local byvariables "customer_number  year  minor_market"
keep `variables' `byvariables'
collapse (mean) `variables', by(`byvariables')


** AUXILIARY VARIABLES 
egen firm = group( customer_number )
* drop firms that are in two markets 
duplicates tag customer_number year, g(tag)
drop if tag>0
tab year, gen(yeardum)

xtset firm year

xtreg mean_premium mean_pred_riskscore yeardum*, fe vce(cluster customer_number)

bys customer_number: egen periods = total(1)
drop if periods<2

xtreg mean_premium mean_pred_riskscore yeardum*, fe vce(cluster customer_number)
generate sample = e(sample)

gen firmstays = 1

keep customer_number  minor_market year firmstays
sort customer_number  year
*duplicates drop 
save firmsinsample_July2017.dta, replace


exit

/*
cd "C:\Users\sfleit2\Documents\ReclassificationRisk_DoFiles"
use database_by_members_n_choiceset.dta, clear 
drop if polnbr=="0"
sort customer_number
drop if customer_number[_n]==customer_number[_n-1]
tempfile aux 
save `aux'
use firmsinsample_July2017.dta, clear
sort customer_number
merge customer_number using `aux'
drop if _merge!=3
drop if customer_number[_n]==customer_number[_n-1]
keep customer_number polnbr minor_market
sort polnbr
save firmsandmarkets.dta, replace 
*/

use firmsinsample_July2017.dta, replace
sort customer_number
drop if customer_number[_n]==customer_number[_n-1]
save firmsandmarkets.dta, replace 




